clear all
set mem 9999999
set more off


cd "$dir\Raw_Data"

use base_data.dta, clear

drop _merge
merge m:1 school1 using "Schools12.dta"
drop _merge
merge m:1 school1 using languagetest.dta
replace language="Dual" if language=="dual"
replace language="" if language=="N/A"
tab language, gen(lang)
drop _merge
merge m:1 school1 using tuitiontest.dta
drop _merge
merge m:1 school1 using "school_indexf12.dta"


sort ID Term
* Never Freshman
gen temp=(FR_nonFR_status =="never enrolled as FR")
by ID : egen Nfreshman=max(temp)
keep if Nfreshman==1
drop temp Nfreshman

//Started as Sophomore
gen temp=(SO_status=="SO at some point")
by ID : egen sophomore=max(temp)
keep if sophomore==1
drop temp sophomore

//Remove graduate studies
tab Class
keep if Class=="A1" | Class=="A2" | Class=="A3" | Class=="A4" | Class=="B1" | Class=="B2" | Class=="B3" ///
 | Class=="E1" | Class=="E2" | Class=="E3" | Class=="E4" | Class=="F1" | Class=="F2" | Class=="F3" ///
 | Class=="G1" | Class=="G2" | Class=="G3" | Class=="G4" | Class=="SO" | Class=="JR" | Class=="SR" ///
 | Class=="L1" | Class=="L2" | Class=="L3" | Class=="L4" | Class=="N2" | Class=="N3" | Class=="N4" ///
 | Class=="P1" | Class=="P2" | Class=="P3" | Class=="PH" | Class=="IE" | Class=="T1" | Class=="T2" | Class=="T3" | Class=="T4" ///
 | Class=="R1" | Class=="R2" | Class=="R3" | Class=="R4" | Class=="R5"


 
bys ID : egen temp_mat=max(math_sat)
bys ID : egen temp_verbal=max(verbal_sat)
replace math_sat=temp_mat
replace verbal_sat=temp_verbal

destring Grade, generate (grade) force

bys Term Subject CourseNum: egen mean_coursegrade=mean(grade) 
bys Term Subject CourseNum: egen sd_coursegrade=sd(grade) 
bys Term Subject CourseNum: gen st_coursegrade=(grade-mean_coursegrade)/sd_coursegrade

tostring Term, generate(str_geocode)
gen Year = substr(str_geocode,1,4)
destring Year , generate(year) force

bys ID Term: egen SemGPA = mean(st_coursegrade)
bys ID year: egen YearGPA = mean(st_coursegrade)
bys ID year: egen YearGPAnostand=mean(grade)

bys ID : egen TotalGPA=mean(st_coursegrade) 
bys ID : egen TotalGPAnostand=mean(grade)

drop CourseNum Subject SECTION_CREDIT_HOURS Grade instructor_sex sd_coursegrade st_coursegrade mean_coursegrade grade dummy_withdrew
duplicates drop 

*First major
bys ID: stripolate  first_major2 year, gen(firstrealmajor2) groupwise
bys ID: stripolate  first_major year, gen(firstrealmajor) groupwise


gen major_science=1 if firstrealmajor2=="MATA" | firstrealmajor2=="MATS" | firstrealmajor2=="APPM" | firstrealmajor2=="PHYS" | firstrealmajor2=="CMPS" | firstrealmajor2=="CHEM" | firstrealmajor2=="GEOL" 
replace major_science=1 if firstrealmajor2=="CONS" | firstrealmajor2=="EECE" | firstrealmajor2=="EELE" | firstrealmajor2=="ECIV" | firstrealmajor2=="ECMP" | firstrealmajor2=="EMCH"  | firstrealmajor2=="CHME" | firstrealmajor2=="INDE"
replace major_science=0 if firstrealmajor2!="" & major_science==. 


sort ID Term
bys ID (Term) : gen bla= _n 
bys ID: egen maxbla = max(bla)
bys ID: egen minbla = min(bla) if year>=2004
gen newbla = 1 if minbla==bla


gen lastsem = 1 if bla==maxbla 


encode lgcy_desc, gen (ny2)

gen legacy = 1 if ny2 !=.
replace legacy = 0 if ny2==.
drop ny2
drop if MAJR_CODE=="CHME" |  MAJR_CODE=="CONS" |  MAJR_CODE=="ELEM" | MAJR_CODE=="ELIT" | MAJR_CODE=="ELNG" |  MAJR_CODE=="GEOL" |  MAJR_CODE=="INDE" |  MAJR_CODE=="LDAR" |  MAJR_CODE=="MATA" |  MAJR_CODE=="MAUD" |  MAJR_CODE=="MCOM" |  MAJR_CODE=="MIMG" |  MAJR_CODE=="SOAN" |  MAJR_CODE=="STAT" 
drop if MAJR_CODE=="MJRL" | MAJR_CODE=="MJLS"| MAJR_CODE=="MJLA"
*****************************************************************************
*																			*
*				Treatment variables and Controls							*
*																			*
*****************************************************************************
*Create Treatment Variables
*Create year ID for advisor
gen advisorIDyear=fake_advisor_id+year


bys ID: egen mmuslim=mean(muslim)
bys ID: egen mchristian=mean(christian)

drop if mmuslim==.5 | (mmuslim>1 & mmuslim<98)
drop if mchristian==.5 | (mchristian>1 & mchristian<98)


drop if mmuslim==98 | mchristian==98

gen islam_school=(mmuslim==1)
gen christ_school=(mchristian==1)
gen secular_school=1 if islam_school!= 1 & christ_school!=1
replace secular_school=0 if secular_school==.


bys fake_advisor_id Term: gen newtotal=_N-1

bys fake_advisor_id Term: egen sum_islam=sum(islam_school)
bys fake_advisor_id Term: gen summus1out=sum_islam-islam_school
bys fake_advisor_id Term: gen prop_islam=summus1out/newtotal

bys Dept Term: gen newtotal2=_N-1
bys Dept Term: egen sum_islam2=sum(islam_school)
bys Dept Term: gen summus1out2=sum_islam2-islam_school
bys Dept Term: gen prop_islam2=summus1out2/newtotal2

bys fake_advisor_id Term: egen sum_christ=sum(christ_school)
bys fake_advisor_id Term: gen sumchris1out=sum_christ-christ_school
bys fake_advisor_id Term: gen prop_christ=sumchris1out/newtotal

bys Dept Term: egen sum_christ2=sum(christ_school)
bys Dept Term: gen sumchris1out2=sum_christ2-christ_school
bys Dept Term: gen prop_christ2=sumchris1out2/newtotal2

bys fake_advisor_id Term: egen sum_sec=sum(secular_school)
bys fake_advisor_id Term: gen sumsec1out=sum_sec-secular_school
bys fake_advisor_id Term: gen prop_sec=sumsec1out/newtotal

bys Dept Term: egen sum_sec2=sum(secular_school)
bys Dept Term: gen sumsec1out2=sum_sec2-secular_school
bys Dept Term: gen prop_sec2=sumsec1out2/newtotal2


gen christianchrist2=christ_school*prop_christ
gen christianislam2=christ_school*prop_islam
gen christiansec2=christ_school*prop_sec

gen islamicchrist2=islam_school*prop_christ
gen islamicislam2=islam_school*prop_islam
gen islamicsec2=islam_school*prop_sec

gen secularchrist2=secular_school*prop_christ
gen secularislam2=secular_school*prop_islam
gen secularsec2=secular_school*prop_sec

gen temp=newtotal+1 if bla==1
bys ID : egen group_size=max(temp)
drop temp

egen stmusl=std(prop_islam)
egen stcath=std(prop_christ)
egen stsec=std(prop_sec)
 
gen christianchrist3=christ_school*stcath
gen  islamicchrist3=islam_school*stcath
 
gen christianislam3=christ_school*stmusl
gen islamicislam3=islam_school*stmusl
 

  tab StudGender, gen(sex)

  tab ADV_GENDER, gen(advsex)

  replace Rank="NA" if Rank==""
  tab Rank, gen(advrank)
  
	bys ID: egen satmat = mean(math_sat)
	bys ID: egen satverb = mean(verbal_sat)
	

bys Term fake_advisor_id: egen qualchris=mean(satmat) if christ_school ==1
bys Term fake_advisor_id: egen qualitychris=mean(qualchris)
gen christ_qualchris = qualitychris*christ_school
gen musl_qualchris = qualitychris*islam_school

bys Term fake_advisor_id: egen qualmusl=mean(satmat) if islam_school==1
bys Term fake_advisor_id: egen qualitymusl=mean(qualmusl)
gen christ_qualmusl = qualitymusl*christ_school
gen musl_qualmusl = qualitymusl*islam_school


bys Term fake_advisor_id: egen qualsec=mean(satmat) if secular_school==1
bys Term fake_advisor_id: egen qualitysec=mean(qualsec)
gen christ_qualitysec = qualitysec*christ_school
gen musl_qualitysec = qualitysec*islam_school


gen sex = 1 if sex1==1
replace sex= 0 if sex2==1

bys fake_advisor_id Term: egen sum_sex=sum(sex)
bys fake_advisor_id Term: gen sumsex1out=sum_sex-sex
bys fake_advisor_id Term: gen meanpeersex=sumsex1out/newtotal
gen christmeanpeersex = christ_school* meanpeersex 
gen muslmeanpeersex = islam_school*meanpeersex



bys fake_advisor_id Term: egen sum_qual=sum(satmat)
bys fake_advisor_id Term: gen sumqual1out=sum_qual-satmat
bys fake_advisor_id Term: gen peerqual=sumqual1out/newtotal

gen christ_peerqual = peerqual*christ_school
gen musl_peerqual = peerqual*islam_school


bys fake_advisor_id Term: gen newtotal3=_N-1 if language!=""

bys fake_advisor_id Term: egen sum_lang2=sum(lang2) if language!=""
bys fake_advisor_id Term: gen sumlang21out=sum_lang2-lang2
bys fake_advisor_id Term: gen prop_french=sumlang21out/newtotal3

gen christ_french=prop_french*christ_school
gen musl_french=prop_french*islam_school

bys fake_advisor_id Term: egen sum_lang3=sum(lang3) if language!=""
bys fake_advisor_id Term: gen sumlang31out=sum_lang3-lang3
bys fake_advisor_id Term: gen prop_eng=sumlang31out/newtotal3

gen christ_eng=prop_eng*christ_school
gen musl_eng=prop_eng*islam_school

bys fake_advisor_id Term: egen sum_lang1=sum(lang1) if language!=""
bys fake_advisor_id Term: gen sumlang11out=sum_lang1-lang1
bys fake_advisor_id Term: gen prop_dual=sumlang11out/newtotal3

gen christ_dual=prop_dual*christ_school
gen musl_dual=prop_dual*islam_school

bys fake_advisor_id Term: egen meantuition=mean(tuitionfees)

gen christ_tuit=meantuition*christ_school
gen musl_tuit=meantuition*islam_school


gen richschool=(tuitionfees>9000000 & tuitionfees!=.)
gen missingtu=(tuitionfees==.)


bys fake_advisor_id Term: egen sum_tu=sum(richschool) 
bys fake_advisor_id Term: gen sumtu1out=sum_tu-richschool
bys fake_advisor_id Term: gen prop_rich=sumtu1out/newtotal

gen christ_rich=prop_rich*christ_school
gen musl_rich=prop_rich*islam_school



bys fake_advisor_id Term: egen sum_tumis=sum(missingtu) 
bys fake_advisor_id Term: gen sumtumis1out=sum_tumis-missingtu
bys fake_advisor_id Term: gen prop_miss=sumtumis1out/newtotal

gen christ_tumis=prop_miss*christ_school
gen musl_tumis=prop_miss*islam_school

*****************************************************************************
*																			*
*						Outcome Variables									*
*																			*
*****************************************************************************

sort ID Term

gen temp1=SemGPA if bla==1
gen temp2=SemGPA if bla==2
gen temp3=YearGPA if bla==1
gen temp4=YearGPAnostand if bla==1

by ID : egen GPA_s1=max(temp1)
by ID : egen GPA_s2=max(temp2)
by ID : egen GPA_y1=max(temp3)
by ID: egen GPA_sumstat=max(temp4)
drop temp1 temp2 temp3 temp4

bys ID : egen temp=max(nb_semesters)
gen dropout_1st=(temp<4)
drop temp


gen first=year if bla==1
gen last=year if lastsem==1
by ID : egen firstyear=max(first)
by ID : egen lastyear=max(last)
drop first last

gen second=year if bla==2 & year!=firstyear
replace second=year if bla==3 & year!=firstyear
replace second=year if bla==4 & year!=firstyear
by ID : egen secondyear=min(second)
drop second

gen majorfirst=MAJR_CODE if year==firstyear
gen majorsecond=MAJR_CODE if year==secondyear
gen majorgrad=MAJR_CODE if year==lastyear

bys ID: stripolate majorfirst Term, gen(firstmajor) groupwise
bys ID: stripolate majorsecond Term, gen(secondmajor) groupwise
bys ID: stripolate majorgrad Term, gen(gradmajor) groupwise

by ID : egen total_credits=sum(TotalSemestercredits)
sum total_credits, detail

gen Eng=(firstrealmajor2=="CONS" | firstrealmajor2=="EECE" | firstrealmajor2=="EELE" | firstrealmajor2=="ECIV" | firstrealmajor2=="ECMP" | firstrealmajor2=="EMCH"  | firstrealmajor2=="CHME" | firstrealmajor2=="INDE")

gen collaborative=(firstrealmajor2=="CONS" | firstrealmajor2=="EECE" | firstrealmajor2=="EELE" | firstrealmajor2=="ECIV" | firstrealmajor2=="ECMP" | firstrealmajor2=="EMCH"  | firstrealmajor2=="CHME" | firstrealmajor2=="INDE" | firstrealmajor2=="BADM" | firstrealmajor2=="CMPS" | firstrealmajor2=="ECON" | firstrealmajor2=="MATS" | firstrealmajor2=="MATA" | firstrealmajor2=="PHYS" | firstrealmajor2=="STAT")


gen grad=(year==lastyear & (Class=="A4" | Class=="B3" | Class=="E4" | Class=="F3" | Class=="G4" | Class=="L4" | Class=="N4" | Class=="P3" | Class=="R5" | Class=="SR" | Class=="T4"))
by ID : egen graduate=max(grad) if firstyear<=2012

gen time_to_graduate=lastyear-firstyear+1 if graduate==1

gen grad4=0
gen grad6=0
replace grad4=1 if ((time_to_graduate<=4 & Eng==1 & graduate==1) | (time_to_graduate<=3 & Eng==0 & graduate==1))
replace grad6=1 if ((time_to_graduate<=6 & Eng==1 & graduate==1) | (time_to_graduate<=5 & Eng==0 & graduate==1))

by ID : egen gradin4=max(grad4)
by ID : egen gradin6=max(grad6)
replace gradin4=. if firstyear>2012
replace gradin6=. if firstyear>2012
drop grad4 grad6

gen temp=TotalGPA if graduate==1
gen temp2=TotalGPAnostand if graduate==1
by ID : egen GPA_grad=max(temp)
by ID : egen GPA_grad_sumstat=max(temp2)
drop temp temp2

gen transfer2=(firstmajor!=secondmajor)
gen transfer=(firstmajor!=gradmajor) if graduate==1

tabulate MAJR_CODE, generate(majour)

keep ID Term StudGender FR_nonFR_status MAJR_CODE Dept SO_status math_sat verbal_sat admission total_sat composite av1 scale1 av2 scale2 applied_faid father_job lgcy_desc birth_year city ///
	secular_school islam_school christ_school prop_islam prop_christ prop_sec  prop_islam2  prop_christ2 prop_sec2 stmusl stcath christianchrist3 christianislam3 islamicchrist3 islamicislam3 fake_advisor_id advisorIDyear ///
	sex* advsex* advrank* Rank  ADV_GENDER satmat satverb meanpeersex christmeanpeersex muslmeanpeersex transfer transfer2 collaborative group_size school1 school2 newtotal ///
	 qualitysec qualitychris qualitymusl christ_qualchris legacy musl_qualchris christ_qualmusl christ_qualitysec musl_qualitysec peerqual christ_peerqual musl_peerqual musl_qualmusl Class prop_french christ_french musl_french prop_eng christ_eng musl_eng prop_dual christ_dual musl_dual tuitionfees  meantuition  christ_tuit musl_tuit  prop_rich christ_rich musl_rich prop_miss christ_tumis musl_tumis ///
	GPA_s1 GPA_s2 GPA_y1  GPA_sumstat GPA_grad_sumstat dropout_1st Eng graduate firstyear lastyear time_to_graduate gradin4 gradin6 GPA_grad majour*  non_div christ_maj sunni_share greek_share maronite_share alawite_share shia_share greek_catholic_share arm_orth_share minorities_share arm_cath_share evangelic_share druze_share frac_simple_index christian_share frac_index muslim_share  frac_group_index Kada

bys ID (Term) : gen group=_n
keep if group==1
drop group

encode Dept, gen(Dept1)
encode Rank, gen(Rank1)
encode ADV_GENDER, gen(ADVGENDER)


cd "$dir\Final_Data"

saveold Final_sample.dta,version(12) replace
preserve
collapse peerqual christ_peerqual musl_peerqual meanpeersex christmeanpeersex muslmeanpeersex prop_french christ_french musl_french prop_eng christ_eng musl_eng meantuition christ_tuit musl_tuit, by(ID)
gen missing_lang=(prop_eng==.& prop_french==.)
replace christ_eng  = 0 if prop_eng==.
replace musl_eng  = 0 if prop_eng==.
replace christ_french  = 0 if prop_french==.
replace musl_french  = 0 if prop_french==.
replace prop_eng=0 if prop_eng==.
replace prop_french=0 if prop_french==.
saveold Controls-Merger.dta, version(12) replace 
restore

